import scrapy
from bs4 import BeautifulSoup

from dora_spider.items import DoraSpiderItem


class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['book.douban.com/top250']
    start_urls = ['https://book.douban.com/top250']

    def _parse(self, response, **kwargs):
        bs = BeautifulSoup(response.text, 'html.parser')
        tr_tag = bs.find_all('tr', class_='item')
        for tr in tr_tag:
            item = DoraSpiderItem()
            title = tr.find_all('a')[1]['title']
            publish = tr.find('p', class_='pl').text
            score = tr.find('span', class_='rating_nums').text
            # print([title, publish, score])
            item['title'] = title
            item['publish'] = publish
            item['score'] = score

            # 数据提交给引擎
            yield item
